# -*- coding: utf-8 -*-
"""
Created on Mon Apr 13 18:01:22 2020

@author: wjx
"""
import re
import requests

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
}
result = []
for i in range(10):
    url = 'https://movie.douban.com/top250?start={}&filter='.format(25*i)
    html = requests.get(url,headers=headers)
    text = html.text

    rel = '<div class="item">.*?<em class="">(?P<num>.*?)</em>.*?<span class="title">(?P<name>.*?)</span>.*?'\
        '<span class="rating_num" property="v:average">(?P<rate>.*?)</span>.*?<span>(?P<rate_num>\d*?)人评价</span>'
    res = re.findall(rel,text,flags=re.S)
    result.extend(res)
